import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

# 输入文件
root = "/media/hsmy/wanghao_18T/biology/mobivision/outs_vdj/240227V-S-SJJ-V1T_S40_L001_outs/"
input_csv = root + "240227V-S-SJJ-V1T_S40_L001_airr_rearrangement.tsv"
output_fasta = "/home/hsmy/mob/tcr/consensus.fasta"

# 读取墨卓的输出文件
df = pd.read_csv(input_csv, sep='\t')

# 生成 10x 的 FASTA 文件
sequences = []
# for index, row in df.iterrows():
#     seq = row["sequence_alignment"].replace("\n", "")
#     record = SeqRecord(Seq(seq), id=f"{row['cell_id']}_contig_{index}", description="")
#     sequences.append(record)
#
# # 保存 FASTA 文件
# SeqIO.write(sequences, output_fasta, "fasta")

for index, row in df.iterrows():
    seq = row["sequence_alignment"].replace("\n", "")
    record = f">{row['sequence_id']}\n{seq}"
    sequences.append(record)

w = open(output_fasta, mode="w+")
for i in sequences:
    i += "\n"
    w.write(i)
w.close()


print("转换完成")
